# package
require("readr")
require("dplyr")
require("tidyr")
require("stringr")
require("survey")
require("reshape2")
require("scales")
require("ggplot2")
require("ggpubr")
theme_set(theme_bw(base_size = 12))

# set working directory
setwd()

# load
votematch <- read_csv(
  "2022VOTEMATCH20220812.csv", locale = locale(encoding = "SJIS")
)
votematch$DATE <- as.POSIXct(as.character(votematch$DATE), format = "%Y-%m-%d %H:%M:%S", tz ='Japan')
votematch <- votematch[votematch$DATE <= as.POSIXct("2022-07-10 20:00:00", tz="Japan"),]
votematch <- votematch[votematch$SEX %in% c(1, 2) & votematch$AGE %in% 2:9,]

# recode
## period
votematch$PERIOD <- "- Jul. 7"
votematch$PERIOD[votematch$DATE >= as.POSIXct("2022-07-08 00:00:00", tz="Japan") & 
                   votematch$DATE < as.POSIXct("2022-07-09 00:00:00", tz="Japan")] <- "Jul. 8"
votematch$PERIOD[votematch$DATE >= as.POSIXct("2022-07-09 00:00:00", tz="Japan") & 
                   votematch$DATE < as.POSIXct("2022-07-10 00:00:00", tz="Japan")] <- "Jul. 9"
votematch$PERIOD[votematch$DATE >= as.POSIXct("2022-07-10 00:00:00", tz="Japan")] <- "Jul. 10"
votematch$PERIOD <- factor(votematch$PERIOD, levels = c("- Jul. 7", "Jul. 8", "Jul. 9", "Jul. 10"))
table(votematch$PERIOD, useNA = "always")
## prefecture
votematch$PREFECTURE_NAME <- recode(
  votematch$PREFECTURE, 
  `1` = "Hokkaido", `2` = "Aomori", `3` = "Iwate", `4` = "Miyagi", 
  `5` = "Akita", `6` = "Yamagata", `7` = "Fukushima", `8` = "Ibaraki", 
  `9` = "Tochigi", `10` = "Gunma", `11` = "Saitama", `12` = "Chiba", 
  `13` = "Tokyo", `14` = "Kanagawa", `15` = "Niigata", `16` = "Toyama", 
  `17` = "Ishikawa", `18` = "Fukui", `19` = "Yamanashi", `20` = "Nagano", 
  `21` = "Gifu", `22` = "Shizuoka", `23` = "Aichi", `24` = "Mie", 
  `25` = "Shiga", `26` = "Kyoto", `27` = "Osaka", `28` = "Hyogo", 
  `29` = "Nara", `30` = "Wakayama", `31` = "Tottori", `32` = "Shimane", 
  `33` = "Okayama", `34` = "Hiroshima", `35` = "Yamaguchi", `36` = "Tokushima", 
  `37` = "Kagawa", `38` = "Ehime", `39` = "Kochi", `40` = "Fukuoka", 
  `41` = "Saga", `42` = "Nagasaki", `43` = "Kumamoto", `44` = "Oita", 
  `45` = "Miyazaki", `46` = "Kagoshima", `47` = "Okinawa"
)
votematch$PREFECTURE_NAME <- factor(
  votematch$PREFECTURE_NAME, 
  levels = c("Hokkaido", "Aomori", "Iwate", "Miyagi", 
             "Akita", "Yamagata", "Fukushima", "Ibaraki", 
             "Tochigi", "Gunma", "Saitama", "Chiba", 
             "Tokyo", "Kanagawa", "Niigata", "Toyama", 
             "Ishikawa", "Fukui", "Yamanashi", "Nagano", 
             "Gifu", "Shizuoka", "Aichi", "Mie", 
             "Shiga", "Kyoto", "Osaka", "Hyogo", 
             "Nara", "Wakayama", "Tottori", "Shimane", 
             "Okayama", "Hiroshima", "Yamaguchi", "Tokushima", 
             "Kagawa", "Ehime", "Kochi", "Fukuoka", 
             "Saga", "Nagasaki", "Kumamoto", "Oita", 
             "Miyazaki", "Kagoshima", "Okinawa")
)
table(votematch$PREFECTURE_NAME, useNA = "always")
## partisanship
votematch$LDP <- ifelse(votematch$PARTY == 1, 1, 0)
votematch$LEFT_OPP <- ifelse(votematch$PARTY %in% c(2, 6, 7, 8), 1, 0)
votematch$RIGHT_OPP <- ifelse(votematch$PARTY %in% c(4, 5, 9), 1, 0)
## age
votematch$AGE2 <- recode(
  votematch$AGE, 
  `2` = 1, `3` = 1, `4` = 2, `5` = 3, `6` = 4, `7` = 5, `8` = 5, `9` = 5
)
table(votematch$AGE2, useNA = "always")
## political attitude
votematch$Q1[votematch$Q1 == 99] <- NA
votematch$Q1 <- -1 * votematch$Q1 + 6
votematch$Q18[votematch$Q18 == 99] <- NA
votematch$Q18 <- -1 * votematch$Q18 + 6

# weighted mean
population <- read_csv(
  "SUBGROUP_POPULATION.csv", locale = locale(encoding = "SJIS")
)
population <- rename(population, AGE2 = AGE)
period <- c("- Jul. 7", "Jul. 8", "Jul. 9" , "Jul. 10")
weighted.prefecture.mean <- NULL
for (i in 1:4) {
  for (j in 1:47) {
    sub.unweighted <- svydesign(
      ids=~1, data = votematch[votematch$PERIOD == period[i] & votematch$PREFECTURE == j,]
    )
    marginal.sex <- aggregate(
      POPULATION ~ SEX, data = population[population$PREFECTURE == j,], FUN = sum
    )
    marginal.sex$Freq <- 
      marginal.sex$POPULATION / sum(population$POPULATION[population$PREFECTURE == j])
    marginal.sex$Freq <- 
      nrow(votematch[votematch$PERIOD == period[i] & votematch$PREFECTURE == j,]) * marginal.sex$Freq
    marginal.age <- aggregate(
      POPULATION ~ AGE2, data = population[population$PREFECTURE == j,], FUN = sum
    )
    marginal.age$Freq <- 
      marginal.age$POPULATION / sum(population$POPULATION[population$PREFECTURE == j])
    marginal.age$Freq <- 
      nrow(votematch[votematch$PERIOD == period[i] & votematch$PREFECTURE == j,]) * marginal.age$Freq
    sub.rake <- rake(
      design = sub.unweighted, 
      sample.margins = list(~SEX, ~AGE2), 
      population.margins = list(marginal.sex[,c("SEX", "Freq")], 
                                marginal.age[,c("AGE2", "Freq")])
    )
    sub.mean <- svymean(
      votematch[votematch$PERIOD == period[i] & votematch$PREFECTURE == j, 
                c("LDP", "LEFT_OPP", "RIGHT_OPP", "Q18")], 
      sub.rake, na.rm = TRUE
    )
    sub.mean <- as.data.frame(sub.mean)
    sub.mean$PREFECTURE <- j
    sub.mean$PERIOD <- period[i]
    sub.mean$QUESTION <- c("LDP", "LEFT_OPP", "RIGHT_OPP", "ORDER")
    weighted.prefecture.mean <- rbind(
      weighted.prefecture.mean, sub.mean
    )
  }
}
rownames(weighted.prefecture.mean) <- 1:nrow(weighted.prefecture.mean)
weighted.prefecture.mean$PERIOD <- factor(
  weighted.prefecture.mean$PERIOD, 
  levels = c("- Jul. 7", "Jul. 8", "Jul. 9", "Jul. 10")
)
weighted.prefecture.mean$PREFECTURE_NAME <- recode(
  weighted.prefecture.mean$PREFECTURE, 
  `1` = "Hokkaido", `2` = "Aomori", `3` = "Iwate", `4` = "Miyagi", 
  `5` = "Akita", `6` = "Yamagata", `7` = "Fukushima", `8` = "Ibaraki", 
  `9` = "Tochigi", `10` = "Gunma", `11` = "Saitama", `12` = "Chiba", 
  `13` = "Tokyo", `14` = "Kanagawa", `15` = "Niigata", `16` = "Toyama", 
  `17` = "Ishikawa", `18` = "Fukui", `19` = "Yamanashi", `20` = "Nagano", 
  `21` = "Gifu", `22` = "Shizuoka", `23` = "Aichi", `24` = "Mie", 
  `25` = "Shiga", `26` = "Kyoto", `27` = "Osaka", `28` = "Hyogo", 
  `29` = "Nara", `30` = "Wakayama", `31` = "Tottori", `32` = "Shimane", 
  `33` = "Okayama", `34` = "Hiroshima", `35` = "Yamaguchi", `36` = "Tokushima", 
  `37` = "Kagawa", `38` = "Ehime", `39` = "Kochi", `40` = "Fukuoka", 
  `41` = "Saga", `42` = "Nagasaki", `43` = "Kumamoto", `44` = "Oita", 
  `45` = "Miyazaki", `46` = "Kagoshima", `47` = "Okinawa"
)
weighted.prefecture.mean <- 
  weighted.prefecture.mean[,c("PREFECTURE", "PREFECTURE_NAME", 
                              "PERIOD", "QUESTION", "mean", "SE")]
colnames(weighted.prefecture.mean)[5] <- "MEAN"
table(weighted.prefecture.mean$PREFECTURE_NAME, useNA = "always")

# compare
## 2021 HoR Election
hor2021 <- read_csv(
  "PREFECTURE_HOC2022.csv", locale = locale(encoding = "SJIS")
)
hor2021 <- rename(hor2021, PREFECTURE = PREFECTURE_ID)
hor2021 <- mutate(
  hor2021, 
  LDP = LDP_PERCENT_HOR2021, 
  LEFT_OPP = CDP_PERCENT_HOR2021 + JCP_PERCENT_HOR2021 + REIWA_PERCENT_HOR2021 + SDP_PERCENT_HOR2021, 
  RIGHT_OPP = JRP_PERCENT_HOR2021 + DPFP_PERCENT_HOR2021 + NHK_PERCENT_HOR2021
)
compare.partisanship <- weighted.prefecture.mean[,-6] %>% 
  filter(QUESTION %in% c("LDP", "LEFT_OPP", "RIGHT_OPP")) %>% 
  rename(VOTEMATCH = MEAN)
compare.partisanship <- hor2021 %>% 
  dplyr::select(PREFECTURE_NAME, LDP, LEFT_OPP, RIGHT_OPP) %>% 
  gather(key = QUESTION, value = HOR, -PREFECTURE_NAME) %>% 
  left_join(compare.partisanship, ., by = c("PREFECTURE_NAME", "QUESTION"))
compare.partisanship.plot <- compare.partisanship %>% 
  filter(PERIOD == "- Jul. 7" & QUESTION == "LDP") %>% 
  ggplot(aes(x = HOR, y = 100 * VOTEMATCH)) + 
  geom_point(color = "gray40") + 
  geom_smooth(method = "lm", color = "black") + 
  stat_cor(label.y = 12, label.x.npc = "left", 
           size = 4.0, method = "pearson", 
           p.accuracy = 0.05, r.accuracy = 0.01) + 
  labs(x = "2021 HoR Election", y = "VAA (- Jul. 7, Weighted)") + 
  theme(plot.title = element_blank(), 
        axis.text.x = element_text(size = 10), 
        axis.title.x = element_text(size = 10), 
        axis.text.y = element_text(size = 10), 
        axis.title.y = element_text(size = 10))
plot(compare.partisanship.plot)
## political attitude
load("MRP_PREFECTURE_UTAS.rda")
compare.attitude <- weighted.prefecture.mean[,-6] %>% 
  filter(QUESTION == "ORDER") %>% 
  rename(VOTEMATCH = MEAN)
compare.attitude <- mrp.result %>% 
  filter(YEAR == 2022) %>% 
  gather(key = QUESTION, value = MEAN, -PREFECTURE_NAME) %>% 
  mutate(MEAN = rescale(MEAN, to = c(1, 5), from = c(0, 1))) %>% 
  left_join(compare.attitude, ., by = c("PREFECTURE_NAME", "QUESTION")) %>% 
  rename(UTAS = MEAN)
compare.attitude.plot <- compare.attitude %>% 
  filter(PERIOD == "- Jul. 7") %>% 
  ggplot(aes(x = UTAS, y = VOTEMATCH)) + 
  xlim(2.5, 3.8) + ylim(2.5, 3.8) + 
  geom_point(color = "gray40") + 
  geom_smooth(method = "lm", color = "black") + 
  stat_cor(label.y = 3.6, label.x.npc = "left", 
           size = 4.0, method = "pearson", 
           p.accuracy = 0.05, r.accuracy = 0.01) + 
  labs(x = "UTAS", y = "VAA (- Jul. 7, Weighted)") + 
  theme(plot.title = element_blank(), 
        axis.text.x = element_text(size = 10), 
        axis.title.x = element_text(size = 10), 
        axis.text.y = element_text(size = 10), 
        axis.title.y = element_text(size = 10))
plot(compare.attitude.plot)
